Authors : Tracy Rabilloud*, Delphine Potier*, Saran Pankew, Mathis Nozais, Marie Loosveld§, Dominique Payet-Bornet§

* Equal contribution

§ Corresponding authors

PMID: to come

Raw data and intermediate data matrices are available in SRA/GEO (SRP269742 / GSE153697)

Docker images and intermediate seurat object are available in zenodo. Any questions on this analysis, please contact Delphine Potier

9q deletion

Stringeant gamma threshold

  • gamma threshold = 9

In the CaSpER paper figure 4C (see https://www.nature.com/articles/s41467-019-13779-x), a Ɣ threshold of 9 correspond to a thr of 1 in the extractLargeScaleEvents() function. Indeed, in the function, mergeScalesDel variable is divided by the object length which is 9 (9/9 = 1) (“finalChrMat[(mergeScalesDel/length(final.objects)) >= thr] <- (-1)”)

UMAP

# Plot 9q deletion score
## Clones of interest : Get barcodes for T1-CD19neg clones located in the T1 tumoral area (Cluster 0) for highlighting
Idents(Seurat) <- "RNA_snn_res.0.1"
HTO_T1CD19n_in_tumor <- intersect(row.names(subset(Seurat@meta.data, hash.ID == "T1-CD19neg")), WhichCells(Seurat, slot = "RNA_snn_res.0.1", idents = c("0","1")))

## Add 9q status in the metadata
Seurat@meta.data$chr9q <- "nothing"
Seurat@meta.data$chr9q <- finalChrMat[,"9q"]

## Draw UMAP
### Get the coordinates & values
Umap<-data.frame(
UMAP_1 = Seurat@reductions$umap@cell.embeddings[,1],
UMAP_2= Seurat@reductions$umap@cell.embeddings[,2],
cnv= Seurat@meta.data$chr9q)

HTO= Seurat@meta.data$hash.ID
Max=max(Seurat@meta.data$chr9q)
Min=min(Seurat@meta.data$chr9q)

ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.8)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 3, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange")) 

Interactive UMAP

### Plot an interactive UMAP
ggplotly(ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.6)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 2, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange")))

9q deletion summary

  1. Number of 9q large scale event by group of cells
C0_chr9q_del_counts <- table(factor(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "0"),]$chr9q, levels = -1:1))

C1_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "1"),]$chr9q)

C2345_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "2" | Seurat@meta.data$RNA_snn_res.0.1 == "3" | Seurat@meta.data$RNA_snn_res.0.1 == "4" | Seurat@meta.data$RNA_snn_res.0.1 =="5"),]$chr9q)

 
chr9q_table <- matrix(data = c(C0_chr9q_del_counts,C1_chr9q_del_counts,C2345_chr9q_del_counts), nrow = 3,ncol = 3 )
rownames(chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(chr9q_table) <- c("Cluster 0","Cluster 1","Clusters 2, 3, 4, 5")

chr9q_table
##               Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion            872       640                   1
## Neutral             262       152                 988
## Amplification         0         2                   2
  1. Number of 9q large scale event for T1CD19neg cells in tumoral clusters (0 & 1)
Seurat20c <- subset(Seurat, cells = HTO_T1CD19n_in_tumor)

HTO_T1CD19n_in_tumor_chr9q_del_counts <- table(factor(Seurat20c@meta.data[,]$chr9q, levels = -1:1))

HTO_T1CD19n_in_tumor_chr9q_table <- matrix(data = c(HTO_T1CD19n_in_tumor_chr9q_del_counts), nrow = 3,ncol = 1 )
rownames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("T1CD19neg cells in tumoral clusters (0 & 1)")

HTO_T1CD19n_in_tumor_chr9q_table
##               T1CD19neg cells in tumoral clusters (0 & 1)
## Deletion                                               10
## Neutral                                                10
## Amplification                                           0
  1. Proportions of 9q large scale event by group of cells
round(prop.table(chr9q_table,2)*100,2)
##               Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion           76.9     80.60                 0.1
## Neutral            23.1     19.14                99.7
## Amplification       0.0      0.25                 0.2

Relaxed gamma threshold

  • gamma threshold = 5
## summarize large scale events 
finalChrMat2 <- extractLargeScaleEvents (final.objects, thr=0.65) # less stringeant, increase the sensitivity but deacrease specificity, by default thr is est at 0.5

# Plot 9q deletion score
Seurat@meta.data$chr9q <- "nothing"
Seurat@meta.data$chr9q <- finalChrMat2[,"9q"]

  Umap<-data.frame(
    UMAP_1 = Seurat@reductions$umap@cell.embeddings[,1],
    UMAP_2= Seurat@reductions$umap@cell.embeddings[,2],
    cnv= Seurat@meta.data$chr9q
  )
  HTO= Seurat@meta.data$hash.ID
  Max=max(Seurat@meta.data$chr9q)
  Min=min(Seurat@meta.data$chr9q)

#Non interactive UMAP
ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.8)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 3, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange"))      

Interactive UMAP

#Interactive UMAP
ggplotly(ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.6)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 2, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange")))

9q deletion summary

  1. Number of 9q large scale event by group of cells
C0_chr9q_del_counts <- table(factor(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "0"),]$chr9q, levels = -1:1))

C1_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "1"),]$chr9q)

C2345_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "2" | Seurat@meta.data$RNA_snn_res.0.1 == "3" | Seurat@meta.data$RNA_snn_res.0.1 == "4" | Seurat@meta.data$RNA_snn_res.0.1 =="5"),]$chr9q)

 
chr9q_table <- matrix(data = c(C0_chr9q_del_counts,C1_chr9q_del_counts,C2345_chr9q_del_counts), nrow = 3,ncol = 3 )
rownames(chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(chr9q_table) <- c("Cluster 0","Cluster 1","Clusters 2, 3, 4, 5")

chr9q_table
##               Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion           1020       726                   1
## Neutral             113        65                 979
## Amplification         1         3                  11
  1. Number of 9q large scale event for T1CD19neg cells in tumoral clusters (0 & 1)
Seurat20c <- subset(Seurat, cells = HTO_T1CD19n_in_tumor)

HTO_T1CD19n_in_tumor_chr9q_del_counts <- table(factor(Seurat20c@meta.data[,]$chr9q, levels = -1:1))

HTO_T1CD19n_in_tumor_chr9q_table <- matrix(data = c(HTO_T1CD19n_in_tumor_chr9q_del_counts), nrow = 3,ncol = 1 )
rownames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("T1CD19neg cells in tumoral clusters (0 & 1)")

HTO_T1CD19n_in_tumor_chr9q_table
##               T1CD19neg cells in tumoral clusters (0 & 1)
## Deletion                                               16
## Neutral                                                 4
## Amplification                                           0
  1. Proportions of 9q large scale event by group of cells
round(prop.table(chr9q_table,2)*100,2)
##               Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion          89.95     91.44                0.10
## Neutral            9.96      8.19               98.79
## Amplification      0.09      0.38                1.11

Check specific BAF

# Load bafextract results (here "cluster0_T1CD19neg_baf_mincov10_2_05", "cluster0_T1CD19neg_baf_mincov10", "cluster0_T1CD19neg_baf_mincov6" & "cluster0_T1CD19neg_baf")
loh_T1CD19n_in_tumor <- readBAFExtractOutput(path = OTHER_BAF_PATH, sequencing.type = "single-cell")
names(loh_T1CD19n_in_tumor) <- gsub(".snp","", names(loh_T1CD19n_in_tumor))

rs10900809

obj <- final.objects[[9]]

#Check for some specific BAF
## rs10900809
snp132490630_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "132490630"),]

snp132490630_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "132490630"),]

snp132490630_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "132490630"),]

snp_freq_table <- matrix(data = c(snp132490630_c0$alt,snp132490630_c1$alt,snp132490630_c2_3_4_5$alt,snp132490630_c0$ref,snp132490630_c1$ref,snp132490630_c2_3_4_5$ref,snp132490630_c0$coverage,snp132490630_c1$coverage,snp132490630_c2_3_4_5$coverage,snp132490630_c0$baf,snp132490630_c1$baf,snp132490630_c2_3_4_5$baf), nrow = 3,ncol = 4 )

rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
##                  Alt Ref Coverage       BAF
## Cluster 0        456   7      463 0.9848812
## Cluster 1        641  12      653 0.9816233
## Clusters 2,3,4,5 447 365      812 0.5504926
snp132490630_tumT1CD19neg <- loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "132490630"),]

rownames(snp132490630_tumT1CD19neg) <- "Tumoral T1CD19negative cells"
snp132490630_tumT1CD19neg[,3:6]
##                              alt ref coverage baf
## Tumoral T1CD19negative cells   8   0        8   1

rs4705403

snp150000930_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "150000930"),]

snp150000930_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "150000930"),]

snp150000930_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "150000930"),]



snp_freq_table <- matrix(data = c(snp150000930_c0$alt,snp150000930_c1$alt,snp150000930_c2_3_4_5$alt,snp150000930_c0$ref,snp150000930_c1$ref,snp150000930_c2_3_4_5$ref,snp150000930_c0$coverage,snp150000930_c1$coverage,snp150000930_c2_3_4_5$coverage,snp150000930_c0$baf,snp150000930_c1$baf,snp150000930_c2_3_4_5$baf), nrow = 3,ncol = 4 )

rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
##                  Alt Ref Coverage       BAF
## Cluster 0        333   1      334 0.9970060
## Cluster 1        259   4      263 0.9847909
## Clusters 2,3,4,5  62  51      113 0.5486726
### loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "150402064"),]

print("Less than 6 reads at chr5:150000930 for tumoral T1CD19neg cells")
## [1] "Less than 6 reads at chr5:150000930 for tumoral T1CD19neg cells"

rs1056400

snp150402064_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "150402064"),]

snp150402064_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "150402064"),]

snp150402064_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "150402064"),]

snp_freq_table <- matrix(data = c(snp150402064_c0$alt,snp150402064_c1$alt,snp150402064_c2_3_4_5$alt,snp150402064_c0$ref,snp150402064_c1$ref,snp150402064_c2_3_4_5$ref,snp150402064_c0$coverage,snp150402064_c1$coverage,snp150402064_c2_3_4_5$coverage,snp150402064_c0$baf,snp150402064_c1$baf,snp150402064_c2_3_4_5$baf), nrow = 3,ncol = 4 )

rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
##                   Alt Ref Coverage       BAF
## Cluster 0        1477  22     1499 0.9853235
## Cluster 1        1013  26     1039 0.9749759
## Clusters 2,3,4,5  706 709     1415 0.4989399
snp150402064_tumT1CD19neg <- loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "150402064"),]
rownames(snp150402064_tumT1CD19neg) <- "Tumoral T1CD19negative cells"
snp150402064_tumT1CD19neg[,3:6]
##                              alt ref coverage baf
## Tumoral T1CD19negative cells   7   0        7   1

rs4841

snp150446963_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "150446963"),]

snp150446963_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "150446963"),]

snp150446963_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "150446963"),]

snp_freq_table <- matrix(data = c(snp150446963_c0$alt,snp150446963_c1$alt,snp150446963_c2_3_4_5$alt,snp150446963_c0$ref,snp150446963_c1$ref,snp150446963_c2_3_4_5$ref,snp150446963_c0$coverage,snp150446963_c1$coverage,snp150446963_c2_3_4_5$coverage,snp150446963_c0$baf,snp150446963_c1$baf,snp150446963_c2_3_4_5$baf), nrow = 3,ncol = 4 )

rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
##                   Alt   Ref Coverage       BAF
## Cluster 0        5142   306     5448 0.9438326
## Cluster 1        5850   418     6268 0.9333121
## Clusters 2,3,4,5 4765 16797    21562 0.2209906
snp150446963_tumT1CD19neg <- loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "150446963"),]
rownames(snp150446963_tumT1CD19neg) <- "Tumoral T1CD19negative cells"
snp150446963_tumT1CD19neg[,3:6]
##                              alt ref coverage       baf
## Tumoral T1CD19negative cells  80   7       87 0.9195402
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 10 (buster)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.3.5.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] CaSpER_0.2.0         GOstats_2.52.0       graph_1.64.0        
##  [4] Category_2.52.1      Matrix_1.2-18        org.Hs.eg.db_3.10.0 
##  [7] GO.db_3.10.0         AnnotationDbi_1.48.0 Biobase_2.46.0      
## [10] limma_3.42.2         biomaRt_2.42.1       ape_5.3             
## [13] ggnetwork_0.5.8      intergraph_2.0-2     igraph_1.2.5        
## [16] gridExtra_2.3        scales_1.1.0         mclust_5.4.6        
## [19] reshape_0.8.8        RColorBrewer_1.1-2   pheatmap_1.0.12     
## [22] signal_0.7-6         Rcpp_1.0.4.6         GenomicRanges_1.38.0
## [25] GenomeInfoDb_1.22.1  IRanges_2.20.2       S4Vectors_0.24.4    
## [28] BiocGenerics_0.32.0  ggpubr_0.2.5         magrittr_1.5        
## [31] plotly_4.9.2.1       ggplot2_3.3.2.9000   Seurat_3.1.5        
## 
## loaded via a namespace (and not attached):
##   [1] BiocFileCache_1.10.2   plyr_1.8.6             lazyeval_0.2.2        
##   [4] GSEABase_1.48.0        splines_3.6.3          crosstalk_1.1.0.1     
##   [7] listenv_0.8.0          digest_0.6.25          htmltools_0.4.0       
##  [10] gdata_2.18.0           memoise_1.1.0          cluster_2.1.0         
##  [13] ROCR_1.0-7             globals_0.12.5         annotate_1.64.0       
##  [16] askpass_1.1            prettyunits_1.1.1      colorspace_1.4-1      
##  [19] blob_1.2.1             rappdirs_0.3.1         ggrepel_0.8.2         
##  [22] xfun_0.13              dplyr_0.8.5            crayon_1.3.4          
##  [25] RCurl_1.98-1.2         jsonlite_1.6.1         genefilter_1.68.0     
##  [28] survival_3.1-8         zoo_1.8-7              glue_1.4.0            
##  [31] gtable_0.3.0           zlibbioc_1.32.0        XVector_0.26.0        
##  [34] leiden_0.3.3           Rgraphviz_2.30.0       future.apply_1.5.0    
##  [37] DBI_1.1.0              viridisLite_0.3.0      xtable_1.8-4          
##  [40] progress_1.2.2         reticulate_1.15        bit_4.0.4             
##  [43] rsvd_1.0.3             tsne_0.1-3             AnnotationForge_1.28.0
##  [46] htmlwidgets_1.5.1      httr_1.4.1             gplots_3.0.3          
##  [49] ellipsis_0.3.0         ica_1.0-2              farver_2.0.3          
##  [52] pkgconfig_2.0.3        XML_3.99-0.3           uwot_0.1.8            
##  [55] dbplyr_1.4.3           labeling_0.3           tidyselect_1.0.0      
##  [58] rlang_0.4.5            reshape2_1.4.4         munsell_0.5.0         
##  [61] tools_3.6.3            RSQLite_2.2.1          ggridges_0.5.2        
##  [64] evaluate_0.14          stringr_1.4.0          yaml_2.2.1            
##  [67] npsurv_0.4-0           knitr_1.28             bit64_4.0.5           
##  [70] fitdistrplus_1.0-14    caTools_1.18.0         purrr_0.3.4           
##  [73] RANN_2.6.1             pbapply_1.4-2          RBGL_1.62.1           
##  [76] future_1.17.0          nlme_3.1-144           compiler_3.6.3        
##  [79] curl_4.3               png_0.1-7              lsei_1.2-0            
##  [82] ggsignif_0.6.0         tibble_3.0.1           stringi_1.4.6         
##  [85] lattice_0.20-38        vctrs_0.2.4            pillar_1.4.3          
##  [88] lifecycle_0.2.0        lmtest_0.9-37          RcppAnnoy_0.0.16      
##  [91] data.table_1.12.8      cowplot_1.0.0          bitops_1.0-6          
##  [94] irlba_2.3.3            patchwork_1.0.0        R6_2.4.1              
##  [97] network_1.16.0         KernSmooth_2.23-16     codetools_0.2-16      
## [100] MASS_7.3-51.5          gtools_3.8.2           assertthat_0.2.1      
## [103] openssl_1.4.1          withr_2.2.0            sctransform_0.2.1     
## [106] GenomeInfoDbData_1.2.2 hms_0.5.3              tidyr_1.0.2           
## [109] rmarkdown_2.1          Rtsne_0.15